Group Members: Reid Brown, Cory Carr, Tappy Li, Kesia Ohene-Agyeman

library(leaps)
library(corrplot)
library(readr)
library(mosaic)
library(car)
library(dplyr)
library(Stat2Data)

AmesData = read_csv("/Users/reidbrown/Documents/Senior/Spring 2020/STOR 455/Class Example Data/AmesTrain12.csv")
source("/Users/reidbrown/Documents/Senior/Spring 2020/STOR 455/Class Scripts/ShowSubsets.R")
source("/Users/reidbrown/Documents/Senior/Spring 2020/STOR 455/Class Scripts/anova455.R")
#Drop first column which is "Order"
head(AmesData)
MyAmesData = AmesData %>% select(which(sapply(.,is.numeric)))
MyAmesData = MyAmesData[,2:27]
MyAmesData

Part 1

#Backward Selection Method
Full=lm(Price~., data=MyAmesData)
MSE=(summary(Full)$sigma)^2
BackwardMod1 = step(Full,scale=MSE, trace=FALSE)
BackwardMod1
## 
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition + 
##     YearBuilt + YearRemodel + BasementFinSF + BasementSF + FirstSF + 
##     SecondSF + Bedroom + TotalRooms + Fireplaces + GarageSF + 
##     OpenPorchSF + EnclosedPorchSF + ScreenPorchSF, data = MyAmesData)
## 
## Coefficients:
##     (Intercept)      LotFrontage          LotArea          Quality  
##      -1.405e+03        1.452e-01        5.042e-04        1.654e+01  
##       Condition        YearBuilt      YearRemodel    BasementFinSF  
##       6.716e+00        5.232e-01        1.361e-01        2.237e-02  
##      BasementSF          FirstSF         SecondSF          Bedroom  
##       1.608e-02        4.875e-02        4.212e-02       -8.235e+00  
##      TotalRooms       Fireplaces         GarageSF      OpenPorchSF  
##       5.241e+00        4.642e+00        3.809e-02        4.126e-02  
## EnclosedPorchSF    ScreenPorchSF  
##       3.773e-02        3.854e-02
#Forward Selection Method
none=lm(Price~1,data=MyAmesData)
ForwardMod1 = step(none,scope=list(upper=Full),scale=MSE,direction="forward", trace=FALSE)
ForwardMod1
## 
## Call:
## lm(formula = Price ~ Quality + GroundSF + BasementFinSF + YearBuilt + 
##     GarageSF + LotArea + Condition + BasementSF + LotFrontage + 
##     Bedroom + TotalRooms + Fireplaces + OpenPorchSF + YearRemodel + 
##     ScreenPorchSF + EnclosedPorchSF, data = MyAmesData)
## 
## Coefficients:
##     (Intercept)          Quality         GroundSF    BasementFinSF  
##      -1.432e+03        1.622e+01        4.288e-02        2.260e-02  
##       YearBuilt         GarageSF          LotArea        Condition  
##       5.315e-01        4.098e-02        5.204e-04        6.690e+00  
##      BasementSF      LotFrontage          Bedroom       TotalRooms  
##       1.956e-02        1.472e-01       -8.206e+00        5.043e+00  
##      Fireplaces      OpenPorchSF      YearRemodel    ScreenPorchSF  
##       5.318e+00        4.179e-02        1.437e-01        3.955e-02  
## EnclosedPorchSF  
##       3.920e-02
#StepWise Selection Method -- default direction arg. is Stepwise method
StepwiseMod1 = step(none,scope=list(upper=Full),scale=MSE, trace=FALSE)
StepwiseMod1
## 
## Call:
## lm(formula = Price ~ Quality + GroundSF + BasementFinSF + YearBuilt + 
##     GarageSF + LotArea + Condition + BasementSF + LotFrontage + 
##     Bedroom + TotalRooms + Fireplaces + OpenPorchSF + YearRemodel + 
##     ScreenPorchSF + EnclosedPorchSF, data = MyAmesData)
## 
## Coefficients:
##     (Intercept)          Quality         GroundSF    BasementFinSF  
##      -1.432e+03        1.622e+01        4.288e-02        2.260e-02  
##       YearBuilt         GarageSF          LotArea        Condition  
##       5.315e-01        4.098e-02        5.204e-04        6.690e+00  
##      BasementSF      LotFrontage          Bedroom       TotalRooms  
##       1.956e-02        1.472e-01       -8.206e+00        5.043e+00  
##      Fireplaces      OpenPorchSF      YearRemodel    ScreenPorchSF  
##       5.318e+00        4.179e-02        1.437e-01        3.955e-02  
## EnclosedPorchSF  
##       3.920e-02
#Summaries of each model
summary(BackwardMod1)
## 
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition + 
##     YearBuilt + YearRemodel + BasementFinSF + BasementSF + FirstSF + 
##     SecondSF + Bedroom + TotalRooms + Fireplaces + GarageSF + 
##     OpenPorchSF + EnclosedPorchSF + ScreenPorchSF, data = MyAmesData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -153.278  -16.024   -2.495   11.940  138.810 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.405e+03  1.463e+02  -9.602  < 2e-16 ***
## LotFrontage      1.452e-01  3.570e-02   4.066 5.44e-05 ***
## LotArea          5.042e-04  9.148e-05   5.512 5.33e-08 ***
## Quality          1.654e+01  1.392e+00  11.879  < 2e-16 ***
## Condition        6.716e+00  1.183e+00   5.677 2.16e-08 ***
## YearBuilt        5.232e-01  6.543e-02   7.996 6.95e-15 ***
## YearRemodel      1.361e-01  7.795e-02   1.746 0.081384 .  
## BasementFinSF    2.237e-02  3.042e-03   7.353 6.60e-13 ***
## BasementSF       1.608e-02  5.108e-03   3.149 0.001722 ** 
## FirstSF          4.875e-02  6.376e-03   7.646 8.59e-14 ***
## SecondSF         4.212e-02  5.187e-03   8.120 2.79e-15 ***
## Bedroom         -8.235e+00  2.068e+00  -3.982 7.70e-05 ***
## TotalRooms       5.241e+00  1.514e+00   3.461 0.000577 ***
## Fireplaces       4.642e+00  2.258e+00   2.056 0.040265 *  
## GarageSF         3.809e-02  6.796e-03   5.604 3.24e-08 ***
## OpenPorchSF      4.126e-02  1.737e-02   2.376 0.017839 *  
## EnclosedPorchSF  3.773e-02  1.989e-02   1.897 0.058326 .  
## ScreenPorchSF    3.854e-02  1.962e-02   1.964 0.049964 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.28 on 582 degrees of freedom
## Multiple R-squared:  0.8737, Adjusted R-squared:   0.87 
## F-statistic: 236.8 on 17 and 582 DF,  p-value: < 2.2e-16
summary(ForwardMod1)
## 
## Call:
## lm(formula = Price ~ Quality + GroundSF + BasementFinSF + YearBuilt + 
##     GarageSF + LotArea + Condition + BasementSF + LotFrontage + 
##     Bedroom + TotalRooms + Fireplaces + OpenPorchSF + YearRemodel + 
##     ScreenPorchSF + EnclosedPorchSF, data = MyAmesData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -151.163  -15.762   -2.585   11.901  137.973 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.432e+03  1.465e+02  -9.777  < 2e-16 ***
## Quality          1.622e+01  1.382e+00  11.731  < 2e-16 ***
## GroundSF         4.288e-02  5.057e-03   8.481  < 2e-16 ***
## BasementFinSF    2.260e-02  3.052e-03   7.406 4.59e-13 ***
## YearBuilt        5.315e-01  6.589e-02   8.066 4.15e-15 ***
## GarageSF         4.098e-02  6.731e-03   6.089 2.06e-09 ***
## LotArea          5.204e-04  9.149e-05   5.688 2.04e-08 ***
## Condition        6.690e+00  1.188e+00   5.633 2.76e-08 ***
## BasementSF       1.956e-02  4.021e-03   4.864 1.49e-06 ***
## LotFrontage      1.472e-01  3.573e-02   4.120 4.33e-05 ***
## Bedroom         -8.206e+00  2.065e+00  -3.975 7.93e-05 ***
## TotalRooms       5.043e+00  1.545e+00   3.265  0.00116 ** 
## Fireplaces       5.318e+00  2.235e+00   2.380  0.01763 *  
## OpenPorchSF      4.179e-02  1.743e-02   2.397  0.01686 *  
## YearRemodel      1.437e-01  7.812e-02   1.839  0.06640 .  
## ScreenPorchSF    3.955e-02  1.968e-02   2.009  0.04497 *  
## EnclosedPorchSF  3.920e-02  1.989e-02   1.970  0.04928 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.39 on 583 degrees of freedom
## Multiple R-squared:  0.8725, Adjusted R-squared:  0.869 
## F-statistic: 249.4 on 16 and 583 DF,  p-value: < 2.2e-16
summary(StepwiseMod1)
## 
## Call:
## lm(formula = Price ~ Quality + GroundSF + BasementFinSF + YearBuilt + 
##     GarageSF + LotArea + Condition + BasementSF + LotFrontage + 
##     Bedroom + TotalRooms + Fireplaces + OpenPorchSF + YearRemodel + 
##     ScreenPorchSF + EnclosedPorchSF, data = MyAmesData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -151.163  -15.762   -2.585   11.901  137.973 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.432e+03  1.465e+02  -9.777  < 2e-16 ***
## Quality          1.622e+01  1.382e+00  11.731  < 2e-16 ***
## GroundSF         4.288e-02  5.057e-03   8.481  < 2e-16 ***
## BasementFinSF    2.260e-02  3.052e-03   7.406 4.59e-13 ***
## YearBuilt        5.315e-01  6.589e-02   8.066 4.15e-15 ***
## GarageSF         4.098e-02  6.731e-03   6.089 2.06e-09 ***
## LotArea          5.204e-04  9.149e-05   5.688 2.04e-08 ***
## Condition        6.690e+00  1.188e+00   5.633 2.76e-08 ***
## BasementSF       1.956e-02  4.021e-03   4.864 1.49e-06 ***
## LotFrontage      1.472e-01  3.573e-02   4.120 4.33e-05 ***
## Bedroom         -8.206e+00  2.065e+00  -3.975 7.93e-05 ***
## TotalRooms       5.043e+00  1.545e+00   3.265  0.00116 ** 
## Fireplaces       5.318e+00  2.235e+00   2.380  0.01763 *  
## OpenPorchSF      4.179e-02  1.743e-02   2.397  0.01686 *  
## YearRemodel      1.437e-01  7.812e-02   1.839  0.06640 .  
## ScreenPorchSF    3.955e-02  1.968e-02   2.009  0.04497 *  
## EnclosedPorchSF  3.920e-02  1.989e-02   1.970  0.04928 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.39 on 583 degrees of freedom
## Multiple R-squared:  0.8725, Adjusted R-squared:  0.869 
## F-statistic: 249.4 on 16 and 583 DF,  p-value: < 2.2e-16
vif(BackwardMod1)
##     LotFrontage         LotArea         Quality       Condition 
##        1.083407        1.163735        3.093569        1.483131 
##       YearBuilt     YearRemodel   BasementFinSF      BasementSF 
##        3.367110        2.158414        1.415640        3.260448 
##         FirstSF        SecondSF         Bedroom      TotalRooms 
##        4.428293        4.115268        2.291218        4.321898 
##      Fireplaces        GarageSF     OpenPorchSF EnclosedPorchSF 
##        1.544143        1.737130        1.171583        1.406599 
##   ScreenPorchSF 
##        1.096568
vif(ForwardMod1)
##         Quality        GroundSF   BasementFinSF       YearBuilt 
##        3.027375        5.157769        1.414244        3.389425 
##        GarageSF         LotArea       Condition      BasementSF 
##        1.690700        1.155283        1.483580        2.005624 
##     LotFrontage         Bedroom      TotalRooms      Fireplaces 
##        1.076635        2.266101        4.463418        1.500369 
##     OpenPorchSF     YearRemodel   ScreenPorchSF EnclosedPorchSF 
##        1.171602        2.151402        1.095027        1.396489
#vif(StepwiseMod1)

We conducted backwards selection, forward selection, and stepwise selection on the quantitative predictor variables in the Ames12 dataset. The Backwards selection method returned 17 predictors while the Forwards and Stepwise selection method returns only 16 predictors. The model produced by Forwards and Stepwise selection wound up being the exact same. In the backwards model, the adjusted R^2 value is 0.870, there are two insignificant predictors (YearRemodel and EnclosedPorchSF), and there are no predictors with a VIF value above 5, however, two are very close to this cutoff (FirstSF=4.428293 and SecondSF=4.115268). In the Forwards Selection model the adjusted R^2 value is 0.869, there is only one insignificant predictor (YearRemodel), and there is one predictor with a VIF above 5 (GroundSF). We think that the Forwards Selection model is best becuase there is a 0.001 decrease in R^2, but one less insignificant predictor.

Part 2

plot(ForwardMod1)

hist(ForwardMod1$residuals)

max(abs(ForwardMod1$residuals))
## [1] 151.1633
which.max(abs(ForwardMod1$residuals))
## 320 
## 320
#max(abs(ForwardMod1$residuals))
rstandard(ForwardMod1)[320]
##       320 
## -5.682328
rstudent(ForwardMod1)[320]
##      320 
## -5.84152
plot(rstudent(ForwardMod1)~ForwardMod1$fitted.values, data = MyAmesData)
abline(0,0)

plot(rstandard(ForwardMod1)~ForwardMod1$fitted.values, data = MyAmesData)
abline(0,0)

summary(ForwardMod1)
## 
## Call:
## lm(formula = Price ~ Quality + GroundSF + BasementFinSF + YearBuilt + 
##     GarageSF + LotArea + Condition + BasementSF + LotFrontage + 
##     Bedroom + TotalRooms + Fireplaces + OpenPorchSF + YearRemodel + 
##     ScreenPorchSF + EnclosedPorchSF, data = MyAmesData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -151.163  -15.762   -2.585   11.901  137.973 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.432e+03  1.465e+02  -9.777  < 2e-16 ***
## Quality          1.622e+01  1.382e+00  11.731  < 2e-16 ***
## GroundSF         4.288e-02  5.057e-03   8.481  < 2e-16 ***
## BasementFinSF    2.260e-02  3.052e-03   7.406 4.59e-13 ***
## YearBuilt        5.315e-01  6.589e-02   8.066 4.15e-15 ***
## GarageSF         4.098e-02  6.731e-03   6.089 2.06e-09 ***
## LotArea          5.204e-04  9.149e-05   5.688 2.04e-08 ***
## Condition        6.690e+00  1.188e+00   5.633 2.76e-08 ***
## BasementSF       1.956e-02  4.021e-03   4.864 1.49e-06 ***
## LotFrontage      1.472e-01  3.573e-02   4.120 4.33e-05 ***
## Bedroom         -8.206e+00  2.065e+00  -3.975 7.93e-05 ***
## TotalRooms       5.043e+00  1.545e+00   3.265  0.00116 ** 
## Fireplaces       5.318e+00  2.235e+00   2.380  0.01763 *  
## OpenPorchSF      4.179e-02  1.743e-02   2.397  0.01686 *  
## YearRemodel      1.437e-01  7.812e-02   1.839  0.06640 .  
## ScreenPorchSF    3.955e-02  1.968e-02   2.009  0.04497 *  
## EnclosedPorchSF  3.920e-02  1.989e-02   1.970  0.04928 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.39 on 583 degrees of freedom
## Multiple R-squared:  0.8725, Adjusted R-squared:  0.869 
## F-statistic: 249.4 on 16 and 583 DF,  p-value: < 2.2e-16
plot(formula = Price ~ Quality + GroundSF + BasementFinSF + YearBuilt + 
    GarageSF + LotArea + Condition + BasementSF + LotFrontage + 
    Bedroom + TotalRooms + Fireplaces + OpenPorchSF + YearRemodel + 
    ScreenPorchSF + EnclosedPorchSF, data = MyAmesData)

Looking at the price vs our predictors shows a fairly linear relationship across most predictors. In the cases where it isn’t linear, there are no better curves we could think of including. Linearity is satisfied.

After looking at the residuals vs leverage plot none of the residuals have large leverage because none of them go past the Cook’s Distance boundaries. Becuase of this we are not removing any data points. The raw, standardized, and studantized plots all show that the data is has zero mean beacuse the residuals all appear to be centered around the reference line. The QQ plot follows the reference line well, which indicates normality. The residuals vs fitted plot demonstrated that constant variance is satisfied since there is no apparent fan shape. However, the residuals vs fitted plot shows that independence does not appear to be satisfied, as there is a fairly clear pattern (with superimposed curve) to the residuals. This will not be fixed by adding or removing variables; it will only be fixed by adding a transformation, so we are leaving it be for now and acknowledging that this is not an ideal linear model.

Part 3

MyAmesData2 = MyAmesData
MyAmesData2$PorchSF = (MyAmesData2$ScreenPorchSF + MyAmesData2$EnclosedPorchSF)

for (i in c(2:27)) {
  MyAmesData2[26+i] = log(MyAmesData2[i]+1)
}

MyAmesData2$ScreenPorchYN = (MyAmesData2$ScreenPorchSF !=0) * 1
MyAmesData2$EnclosedPorchYN = (MyAmesData2$EnclosedPorchSF !=0) * 1
MyAmesData2$PorchYN = (MyAmesData2$PorchSF !=0) * 1
#Backward Selection
Full2=lm(Price~., data=MyAmesData2)
MSE=(summary(Full2)$sigma)^2
BackwardMod2 = step(Full2,scale=MSE, trace=FALSE)
BackwardMod2
## 
## Call:
## lm(formula = Price ~ LotArea + Quality + Condition + YearBuilt + 
##     YearRemodel + BasementFinSF + BasementUnFinSF + BasementSF + 
##     FirstSF + SecondSF + GroundSF + BasementFBath + HalfBath + 
##     Bedroom + TotalRooms + GarageCars + OpenPorchSF + ScreenPorchSF + 
##     LotFrontage.1 + LotArea.1 + Quality.1 + YearRemodel.1 + BasementFinSF.1 + 
##     GroundSF.1 + BasementFBath.1 + HalfBath.1 + Bedroom.1 + Fireplaces.1 + 
##     GarageCars.1 + GarageSF.1 + WoodDeckSF.1 + EnclosedPorchSF.1 + 
##     ScreenPorchSF.1 + ScreenPorchYN + EnclosedPorchYN, data = MyAmesData2)
## 
## Coefficients:
##       (Intercept)            LotArea            Quality  
##         1.302e+06          1.829e-04          3.920e+01  
##         Condition          YearBuilt        YearRemodel  
##         7.654e+00          5.061e-01          9.998e+01  
##     BasementFinSF    BasementUnFinSF         BasementSF  
##         1.852e-02         -1.185e-02          3.338e-02  
##           FirstSF           SecondSF           GroundSF  
##         4.340e-02          3.992e-02          5.063e-02  
##     BasementFBath           HalfBath            Bedroom  
##         6.971e+01         -1.207e+02         -1.587e+01  
##        TotalRooms         GarageCars        OpenPorchSF  
##         3.669e+00          5.673e+01          4.170e-02  
##     ScreenPorchSF      LotFrontage.1          LotArea.1  
##        -1.940e-01          1.356e+00          9.032e+00  
##         Quality.1      YearRemodel.1    BasementFinSF.1  
##        -1.498e+02         -1.977e+05         -1.194e+00  
##        GroundSF.1    BasementFBath.1         HalfBath.1  
##        -8.371e+01         -9.977e+01          1.842e+02  
##         Bedroom.1       Fireplaces.1       GarageCars.1  
##         4.612e+01          9.974e+00         -1.548e+02  
##        GarageSF.1       WoodDeckSF.1  EnclosedPorchSF.1  
##         9.881e+00          7.201e-01          8.166e+00  
##   ScreenPorchSF.1      ScreenPorchYN    EnclosedPorchYN  
##         3.989e+01         -1.601e+02         -3.910e+01
#Forward Selection
none2=lm(Price~1,data=MyAmesData2)
ForwardMod2 = step(none2,scope=list(upper=Full2),scale=MSE,direction="forward", trace=FALSE)
ForwardMod2
## 
## Call:
## lm(formula = Price ~ Quality + GroundSF + BasementFinSF + YearBuilt + 
##     LotArea.1 + Quality.1 + Condition.1 + BasementSF + GroundSF.1 + 
##     GarageCars + PorchSF + YearBuilt.1 + Fireplaces + LotFrontage.1 + 
##     OpenPorchSF + LotArea + TotalRooms.1 + Condition + FullBath + 
##     BasementFinSF.1 + BasementUnFinSF + BasementUnFinSF.1 + BasementSF.1 + 
##     GarageSF + WoodDeckSF + GarageCars.1 + GarageSF.1 + Bedroom + 
##     Bedroom.1 + TotalRooms, data = MyAmesData2)
## 
## Coefficients:
##       (Intercept)            Quality           GroundSF  
##         2.421e+05          3.946e+01          7.707e-02  
##     BasementFinSF          YearBuilt          LotArea.1  
##         1.642e-02          1.955e+01          1.034e+01  
##         Quality.1        Condition.1         BasementSF  
##        -1.531e+02         -2.213e+01          4.130e-02  
##        GroundSF.1         GarageCars            PorchSF  
##        -6.046e+01          5.709e+01          3.796e-02  
##       YearBuilt.1         Fireplaces      LotFrontage.1  
##        -3.693e+04          6.974e+00          1.865e+00  
##       OpenPorchSF            LotArea       TotalRooms.1  
##         4.818e-02          2.715e-04         -5.687e+01  
##         Condition           FullBath    BasementFinSF.1  
##         1.236e+01         -5.869e+00         -1.528e+00  
##   BasementUnFinSF  BasementUnFinSF.1       BasementSF.1  
##        -1.999e-02          1.965e+00         -3.380e+00  
##          GarageSF         WoodDeckSF       GarageCars.1  
##        -2.672e-03          8.927e-03         -1.552e+02  
##        GarageSF.1            Bedroom          Bedroom.1  
##         1.117e+01         -1.815e+01          5.088e+01  
##        TotalRooms  
##         1.174e+01
#Stepwise Selection
StepwiseMod2 = step(none2,scope=list(upper=Full2),scale=MSE, trace=FALSE)
StepwiseMod2
## 
## Call:
## lm(formula = Price ~ Quality + GroundSF + BasementFinSF + YearBuilt + 
##     LotArea.1 + Quality.1 + BasementSF + GroundSF.1 + PorchSF + 
##     YearBuilt.1 + Fireplaces + LotFrontage.1 + OpenPorchSF + 
##     LotArea + TotalRooms.1 + Condition + FullBath + BasementFinSF.1 + 
##     BasementUnFinSF + BasementUnFinSF.1 + BasementSF.1 + GarageSF + 
##     WoodDeckSF + EnclosedPorchYN + EnclosedPorchSF.1 + ScreenPorchSF.1 + 
##     ScreenPorchYN, data = MyAmesData2)
## 
## Coefficients:
##       (Intercept)            Quality           GroundSF  
##         3.040e+05          4.446e+01          9.782e-02  
##     BasementFinSF          YearBuilt          LotArea.1  
##         1.427e-02          2.428e+01          9.086e+00  
##         Quality.1         BasementSF         GroundSF.1  
##        -1.801e+02          4.317e-02         -9.261e+01  
##           PorchSF        YearBuilt.1         Fireplaces  
##        -1.734e-01         -4.631e+04          7.130e+00  
##     LotFrontage.1        OpenPorchSF            LotArea  
##         2.201e+00          4.213e-02          2.826e-04  
##      TotalRooms.1          Condition           FullBath  
##         2.258e+01          8.800e+00         -7.515e+00  
##   BasementFinSF.1    BasementUnFinSF  BasementUnFinSF.1  
##        -1.613e+00         -2.296e-02          2.665e+00  
##      BasementSF.1           GarageSF         WoodDeckSF  
##        -3.522e+00          2.694e-02          1.422e-02  
##   EnclosedPorchYN  EnclosedPorchSF.1    ScreenPorchSF.1  
##        -1.294e+02          3.252e+01          3.378e+01  
##     ScreenPorchYN  
##        -1.307e+02
summary(StepwiseMod2)
## 
## Call:
## lm(formula = Price ~ Quality + GroundSF + BasementFinSF + YearBuilt + 
##     LotArea.1 + Quality.1 + BasementSF + GroundSF.1 + PorchSF + 
##     YearBuilt.1 + Fireplaces + LotFrontage.1 + OpenPorchSF + 
##     LotArea + TotalRooms.1 + Condition + FullBath + BasementFinSF.1 + 
##     BasementUnFinSF + BasementUnFinSF.1 + BasementSF.1 + GarageSF + 
##     WoodDeckSF + EnclosedPorchYN + EnclosedPorchSF.1 + ScreenPorchSF.1 + 
##     ScreenPorchYN, data = MyAmesData2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -122.548  -13.934   -1.971   12.599  100.480 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        3.040e+05  6.462e+04   4.705 3.19e-06 ***
## Quality            4.446e+01  4.673e+00   9.513  < 2e-16 ***
## GroundSF           9.782e-02  9.285e-03  10.535  < 2e-16 ***
## BasementFinSF      1.427e-02  7.374e-03   1.935 0.053424 .  
## YearBuilt          2.428e+01  5.026e+00   4.831 1.75e-06 ***
## LotArea.1          9.086e+00  2.990e+00   3.038 0.002488 ** 
## Quality.1         -1.801e+02  2.968e+01  -6.068 2.36e-09 ***
## BasementSF         4.317e-02  8.264e-03   5.223 2.46e-07 ***
## GroundSF.1        -9.261e+01  1.489e+01  -6.221 9.53e-10 ***
## PorchSF           -1.734e-01  6.985e-02  -2.483 0.013320 *  
## YearBuilt.1       -4.631e+04  9.823e+03  -4.714 3.05e-06 ***
## Fireplaces         7.130e+00  2.039e+00   3.497 0.000506 ***
## LotFrontage.1      2.201e+00  6.499e-01   3.387 0.000755 ***
## OpenPorchSF        4.213e-02  1.567e-02   2.688 0.007395 ** 
## LotArea            2.826e-04  1.073e-04   2.635 0.008648 ** 
## TotalRooms.1       2.258e+01  9.425e+00   2.396 0.016901 *  
## Condition          8.800e+00  1.038e+00   8.475  < 2e-16 ***
## FullBath          -7.515e+00  2.893e+00  -2.597 0.009636 ** 
## BasementFinSF.1   -1.613e+00  7.214e-01  -2.235 0.025779 *  
## BasementUnFinSF   -2.296e-02  8.351e-03  -2.750 0.006153 ** 
## BasementUnFinSF.1  2.665e+00  9.546e-01   2.792 0.005416 ** 
## BasementSF.1      -3.522e+00  1.684e+00  -2.092 0.036883 *  
## GarageSF           2.694e-02  6.250e-03   4.310 1.92e-05 ***
## WoodDeckSF         1.422e-02  8.330e-03   1.707 0.088375 .  
## EnclosedPorchYN   -1.294e+02  3.294e+01  -3.930 9.55e-05 ***
## EnclosedPorchSF.1  3.252e+01  8.635e+00   3.766 0.000183 ***
## ScreenPorchSF.1    3.378e+01  1.623e+01   2.081 0.037846 *  
## ScreenPorchYN     -1.307e+02  7.285e+01  -1.795 0.073236 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 24.37 on 572 degrees of freedom
## Multiple R-squared:  0.901,  Adjusted R-squared:  0.8963 
## F-statistic: 192.7 on 27 and 572 DF,  p-value: < 2.2e-16
summary(BackwardMod2)
## 
## Call:
## lm(formula = Price ~ LotArea + Quality + Condition + YearBuilt + 
##     YearRemodel + BasementFinSF + BasementUnFinSF + BasementSF + 
##     FirstSF + SecondSF + GroundSF + BasementFBath + HalfBath + 
##     Bedroom + TotalRooms + GarageCars + OpenPorchSF + ScreenPorchSF + 
##     LotFrontage.1 + LotArea.1 + Quality.1 + YearRemodel.1 + BasementFinSF.1 + 
##     GroundSF.1 + BasementFBath.1 + HalfBath.1 + Bedroom.1 + Fireplaces.1 + 
##     GarageCars.1 + GarageSF.1 + WoodDeckSF.1 + EnclosedPorchSF.1 + 
##     ScreenPorchSF.1 + ScreenPorchYN + EnclosedPorchYN, data = MyAmesData2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -106.38  -13.67   -1.77   12.62   96.69 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.302e+06  1.975e+05   6.596 9.76e-11 ***
## LotArea            1.829e-04  1.099e-04   1.665 0.096506 .  
## Quality            3.920e+01  4.276e+00   9.166  < 2e-16 ***
## Condition          7.654e+00  1.040e+00   7.359 6.57e-13 ***
## YearBuilt          5.061e-01  6.006e-02   8.427 2.97e-16 ***
## YearRemodel        9.998e+01  1.513e+01   6.606 9.13e-11 ***
## BasementFinSF      1.852e-02  6.913e-03   2.679 0.007596 ** 
## BasementUnFinSF   -1.185e-02  7.454e-03  -1.590 0.112324    
## BasementSF         3.338e-02  8.069e-03   4.137 4.05e-05 ***
## FirstSF            4.340e-02  2.013e-02   2.157 0.031463 *  
## SecondSF           3.992e-02  1.986e-02   2.010 0.044933 *  
## GroundSF           5.063e-02  2.173e-02   2.330 0.020139 *  
## BasementFBath      6.971e+01  2.528e+01   2.758 0.006013 ** 
## HalfBath          -1.207e+02  2.567e+01  -4.703 3.23e-06 ***
## Bedroom           -1.587e+01  6.221e+00  -2.551 0.011006 *  
## TotalRooms         3.669e+00  1.330e+00   2.760 0.005976 ** 
## GarageCars         5.673e+01  1.090e+01   5.204 2.74e-07 ***
## OpenPorchSF        4.170e-02  1.486e-02   2.805 0.005198 ** 
## ScreenPorchSF     -1.940e-01  1.191e-01  -1.629 0.103802    
## LotFrontage.1      1.356e+00  6.243e-01   2.173 0.030230 *  
## LotArea.1          9.032e+00  2.871e+00   3.146 0.001742 ** 
## Quality.1         -1.498e+02  2.675e+01  -5.603 3.30e-08 ***
## YearRemodel.1     -1.977e+05  2.996e+04  -6.600 9.52e-11 ***
## BasementFinSF.1   -1.194e+00  6.705e-01  -1.781 0.075394 .  
## GroundSF.1        -8.371e+01  1.472e+01  -5.685 2.10e-08 ***
## BasementFBath.1   -9.977e+01  3.724e+01  -2.679 0.007595 ** 
## HalfBath.1         1.842e+02  3.826e+01   4.814 1.90e-06 ***
## Bedroom.1          4.612e+01  2.087e+01   2.209 0.027543 *  
## Fireplaces.1       9.974e+00  3.107e+00   3.210 0.001403 ** 
## GarageCars.1      -1.548e+02  3.337e+01  -4.639 4.35e-06 ***
## GarageSF.1         9.881e+00  2.533e+00   3.900 0.000108 ***
## WoodDeckSF.1       7.201e-01  4.126e-01   1.745 0.081494 .  
## EnclosedPorchSF.1  8.166e+00  3.544e+00   2.304 0.021581 *  
## ScreenPorchSF.1    3.989e+01  2.512e+01   1.588 0.112876    
## ScreenPorchYN     -1.601e+02  1.087e+02  -1.473 0.141268    
## EnclosedPorchYN   -3.910e+01  1.747e+01  -2.239 0.025569 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.98 on 564 degrees of freedom
## Multiple R-squared:  0.9132, Adjusted R-squared:  0.9078 
## F-statistic: 169.4 on 35 and 564 DF,  p-value: < 2.2e-16
summary(ForwardMod2)
## 
## Call:
## lm(formula = Price ~ Quality + GroundSF + BasementFinSF + YearBuilt + 
##     LotArea.1 + Quality.1 + Condition.1 + BasementSF + GroundSF.1 + 
##     GarageCars + PorchSF + YearBuilt.1 + Fireplaces + LotFrontage.1 + 
##     OpenPorchSF + LotArea + TotalRooms.1 + Condition + FullBath + 
##     BasementFinSF.1 + BasementUnFinSF + BasementUnFinSF.1 + BasementSF.1 + 
##     GarageSF + WoodDeckSF + GarageCars.1 + GarageSF.1 + Bedroom + 
##     Bedroom.1 + TotalRooms, data = MyAmesData2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -167.286  -12.787   -2.027   12.179   98.894 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        2.421e+05  6.676e+04   3.626 0.000313 ***
## Quality            3.946e+01  4.755e+00   8.298 7.70e-16 ***
## GroundSF           7.707e-02  1.242e-02   6.208 1.03e-09 ***
## BasementFinSF      1.642e-02  7.395e-03   2.221 0.026758 *  
## YearBuilt          1.955e+01  5.195e+00   3.763 0.000185 ***
## LotArea.1          1.034e+01  3.021e+00   3.423 0.000664 ***
## Quality.1         -1.531e+02  3.013e+01  -5.083 5.06e-07 ***
## Condition.1       -2.213e+01  3.489e+01  -0.634 0.526265    
## BasementSF         4.130e-02  8.301e-03   4.976 8.62e-07 ***
## GroundSF.1        -6.046e+01  1.870e+01  -3.233 0.001295 ** 
## GarageCars         5.709e+01  1.480e+01   3.857 0.000128 ***
## PorchSF            3.796e-02  1.323e-02   2.869 0.004276 ** 
## YearBuilt.1       -3.693e+04  1.015e+04  -3.640 0.000298 ***
## Fireplaces         6.974e+00  2.067e+00   3.375 0.000789 ***
## LotFrontage.1      1.865e+00  6.592e-01   2.830 0.004821 ** 
## OpenPorchSF        4.818e-02  1.566e-02   3.077 0.002195 ** 
## LotArea            2.715e-04  1.069e-04   2.539 0.011374 *  
## TotalRooms.1      -5.687e+01  4.804e+01  -1.184 0.236992    
## Condition          1.236e+01  5.513e+00   2.243 0.025290 *  
## FullBath          -5.869e+00  3.035e+00  -1.933 0.053677 .  
## BasementFinSF.1   -1.528e+00  7.217e-01  -2.117 0.034690 *  
## BasementUnFinSF   -1.999e-02  8.353e-03  -2.393 0.017031 *  
## BasementUnFinSF.1  1.965e+00  9.706e-01   2.024 0.043406 *  
## BasementSF.1      -3.380e+00  1.685e+00  -2.006 0.045305 *  
## GarageSF          -2.672e-03  1.290e-02  -0.207 0.835988    
## WoodDeckSF         8.927e-03  8.337e-03   1.071 0.284693    
## GarageCars.1      -1.552e+02  4.132e+01  -3.757 0.000190 ***
## GarageSF.1         1.117e+01  3.242e+00   3.444 0.000615 ***
## Bedroom           -1.815e+01  6.646e+00  -2.730 0.006523 ** 
## Bedroom.1          5.088e+01  2.209e+01   2.304 0.021590 *  
## TotalRooms         1.174e+01  6.524e+00   1.799 0.072535 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 24.28 on 569 degrees of freedom
## Multiple R-squared:  0.9022, Adjusted R-squared:  0.897 
## F-statistic: 174.9 on 30 and 569 DF,  p-value: < 2.2e-16

Try to see if the regular or log version of a predictor has a higher correlation with price. We only want to include either to original predictor or the log() version of that predictor. The above models sometimes included both the original predictor and the log() version of that predictor. This is fixed below.

MyAmesData3 = MyAmesData2[1]

corLinear = abs(cor(MyAmesData2$Price, MyAmesData2$LotFrontage))
corLog = abs(cor(MyAmesData2$Price, MyAmesData2$LotFrontage.1))

for (i in c(2:27)) {

  corLinear = abs(cor(MyAmesData2$Price, MyAmesData2[i])[1])
  corLog = abs(cor(MyAmesData2$Price, MyAmesData2[26+i])[1])

  if (corLinear>corLog) {
    MyAmesData3[i] = MyAmesData2[i]
  }
  else {
    MyAmesData3[i] = MyAmesData2[26+i]
  }
}

MyAmesData3$ScreenPorchYN = (MyAmesData2$ScreenPorchSF !=0) * 1
MyAmesData3$EnclosedPorchYN = (MyAmesData2$EnclosedPorchSF !=0) * 1
MyAmesData3$PorchYN = (MyAmesData2$PorchSF !=0) * 1

Try selection methods with new MyAmesData3

#Backward Selection
Full2=lm(Price~., data=MyAmesData3)
MSE=(summary(Full2)$sigma)^2
BackwardMod2 = step(Full2,scale=MSE, trace=FALSE)
BackwardMod2
## 
## Call:
## lm(formula = Price ~ LotFrontage + LotArea.1 + Quality + Condition + 
##     YearBuilt + YearRemodel + BasementFinSF + BasementSF + FirstSF + 
##     SecondSF + Bedroom + TotalRooms + Fireplaces.1 + GarageSF + 
##     OpenPorchSF.1 + EnclosedPorchSF.1 + EnclosedPorchYN + PorchYN, 
##     data = MyAmesData3)
## 
## Coefficients:
##       (Intercept)        LotFrontage          LotArea.1  
##        -1.520e+03          1.137e-01          1.521e+01  
##           Quality          Condition          YearBuilt  
##         1.655e+01          6.768e+00          5.268e-01  
##       YearRemodel      BasementFinSF         BasementSF  
##         1.281e-01          2.211e-02          1.673e-02  
##           FirstSF           SecondSF            Bedroom  
##         4.822e-02          4.393e-02         -9.503e+00  
##        TotalRooms       Fireplaces.1           GarageSF  
##         4.873e+00          6.775e+00          3.225e-02  
##     OpenPorchSF.1  EnclosedPorchSF.1    EnclosedPorchYN  
##         9.655e-01          7.058e+00         -3.742e+01  
##           PorchYN  
##         9.346e+00
#Forward Selection
none2=lm(Price~1,data=MyAmesData3)
ForwardMod2 = step(none2,scope=list(upper=Full2),scale=MSE,direction="forward", trace=FALSE)
ForwardMod2
## 
## Call:
## lm(formula = Price ~ Quality + GroundSF + BasementFinSF + YearBuilt + 
##     LotArea.1 + GarageSF + Condition + BasementSF + Bedroom + 
##     LotFrontage + PorchSF.1 + TotalRooms + Fireplaces.1 + YearRemodel + 
##     OpenPorchSF.1, data = MyAmesData3)
## 
## Coefficients:
##   (Intercept)        Quality       GroundSF  BasementFinSF      YearBuilt  
##    -1.578e+03      1.619e+01      4.427e-02      2.258e-02      5.483e-01  
##     LotArea.1       GarageSF      Condition     BasementSF        Bedroom  
##     1.589e+01      3.485e-02      6.753e+00      1.900e-02     -9.262e+00  
##   LotFrontage      PorchSF.1     TotalRooms   Fireplaces.1    YearRemodel  
##     1.153e-01      1.625e+00      4.508e+00      7.913e+00      1.345e-01  
## OpenPorchSF.1  
##     9.537e-01
#Stepwise Selection
StepwiseMod2 = step(none2,scope=list(upper=Full2),scale=MSE, trace=FALSE)
StepwiseMod2
## 
## Call:
## lm(formula = Price ~ Quality + GroundSF + BasementFinSF + YearBuilt + 
##     LotArea.1 + GarageSF + Condition + BasementSF + Bedroom + 
##     LotFrontage + PorchSF.1 + TotalRooms + Fireplaces.1 + YearRemodel + 
##     OpenPorchSF.1, data = MyAmesData3)
## 
## Coefficients:
##   (Intercept)        Quality       GroundSF  BasementFinSF      YearBuilt  
##    -1.578e+03      1.619e+01      4.427e-02      2.258e-02      5.483e-01  
##     LotArea.1       GarageSF      Condition     BasementSF        Bedroom  
##     1.589e+01      3.485e-02      6.753e+00      1.900e-02     -9.262e+00  
##   LotFrontage      PorchSF.1     TotalRooms   Fireplaces.1    YearRemodel  
##     1.153e-01      1.625e+00      4.508e+00      7.913e+00      1.345e-01  
## OpenPorchSF.1  
##     9.537e-01
summary(StepwiseMod2)
## 
## Call:
## lm(formula = Price ~ Quality + GroundSF + BasementFinSF + YearBuilt + 
##     LotArea.1 + GarageSF + Condition + BasementSF + Bedroom + 
##     LotFrontage + PorchSF.1 + TotalRooms + Fireplaces.1 + YearRemodel + 
##     OpenPorchSF.1, data = MyAmesData3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -164.422  -15.679   -3.107   12.135  133.895 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -1.578e+03  1.509e+02 -10.452  < 2e-16 ***
## Quality        1.619e+01  1.379e+00  11.738  < 2e-16 ***
## GroundSF       4.427e-02  5.013e-03   8.830  < 2e-16 ***
## BasementFinSF  2.258e-02  3.038e-03   7.430 3.88e-13 ***
## YearBuilt      5.483e-01  6.590e-02   8.321 6.20e-16 ***
## LotArea.1      1.589e+01  2.572e+00   6.177 1.22e-09 ***
## GarageSF       3.485e-02  6.808e-03   5.118 4.20e-07 ***
## Condition      6.753e+00  1.179e+00   5.729 1.62e-08 ***
## BasementSF     1.900e-02  4.009e-03   4.739 2.70e-06 ***
## Bedroom       -9.262e+00  2.074e+00  -4.467 9.54e-06 ***
## LotFrontage    1.153e-01  3.616e-02   3.189  0.00150 ** 
## PorchSF.1      1.625e+00  5.858e-01   2.774  0.00572 ** 
## TotalRooms     4.508e+00  1.541e+00   2.926  0.00357 ** 
## Fireplaces.1   7.913e+00  3.540e+00   2.235  0.02577 *  
## YearRemodel    1.345e-01  7.779e-02   1.729  0.08441 .  
## OpenPorchSF.1  9.537e-01  6.121e-01   1.558  0.11975    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.31 on 584 degrees of freedom
## Multiple R-squared:  0.873,  Adjusted R-squared:  0.8698 
## F-statistic: 267.7 on 15 and 584 DF,  p-value: < 2.2e-16
summary(BackwardMod2)
## 
## Call:
## lm(formula = Price ~ LotFrontage + LotArea.1 + Quality + Condition + 
##     YearBuilt + YearRemodel + BasementFinSF + BasementSF + FirstSF + 
##     SecondSF + Bedroom + TotalRooms + Fireplaces.1 + GarageSF + 
##     OpenPorchSF.1 + EnclosedPorchSF.1 + EnclosedPorchYN + PorchYN, 
##     data = MyAmesData3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -153.629  -15.617   -2.989   11.825  133.687 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -1.520e+03  1.535e+02  -9.904  < 2e-16 ***
## LotFrontage        1.137e-01  3.616e-02   3.143  0.00176 ** 
## LotArea.1          1.521e+01  2.610e+00   5.827 9.35e-09 ***
## Quality            1.655e+01  1.403e+00  11.796  < 2e-16 ***
## Condition          6.768e+00  1.177e+00   5.748 1.46e-08 ***
## YearBuilt          5.268e-01  6.803e-02   7.744 4.32e-14 ***
## YearRemodel        1.281e-01  7.773e-02   1.648  0.09996 .  
## BasementFinSF      2.211e-02  3.038e-03   7.278 1.11e-12 ***
## BasementSF         1.673e-02  5.077e-03   3.295  0.00104 ** 
## FirstSF            4.822e-02  6.354e-03   7.590 1.28e-13 ***
## SecondSF           4.393e-02  5.141e-03   8.545  < 2e-16 ***
## Bedroom           -9.503e+00  2.084e+00  -4.560 6.23e-06 ***
## TotalRooms         4.873e+00  1.514e+00   3.219  0.00136 ** 
## Fireplaces.1       6.775e+00  3.577e+00   1.894  0.05868 .  
## GarageSF           3.225e-02  6.853e-03   4.706 3.16e-06 ***
## OpenPorchSF.1      9.655e-01  6.121e-01   1.577  0.11526    
## EnclosedPorchSF.1  7.058e+00  4.049e+00   1.743  0.08186 .  
## EnclosedPorchYN   -3.742e+01  2.041e+01  -1.834  0.06723 .  
## PorchYN            9.346e+00  4.089e+00   2.286  0.02262 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.21 on 581 degrees of freedom
## Multiple R-squared:  0.8746, Adjusted R-squared:  0.8707 
## F-statistic: 225.1 on 18 and 581 DF,  p-value: < 2.2e-16
summary(ForwardMod2)
## 
## Call:
## lm(formula = Price ~ Quality + GroundSF + BasementFinSF + YearBuilt + 
##     LotArea.1 + GarageSF + Condition + BasementSF + Bedroom + 
##     LotFrontage + PorchSF.1 + TotalRooms + Fireplaces.1 + YearRemodel + 
##     OpenPorchSF.1, data = MyAmesData3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -164.422  -15.679   -3.107   12.135  133.895 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -1.578e+03  1.509e+02 -10.452  < 2e-16 ***
## Quality        1.619e+01  1.379e+00  11.738  < 2e-16 ***
## GroundSF       4.427e-02  5.013e-03   8.830  < 2e-16 ***
## BasementFinSF  2.258e-02  3.038e-03   7.430 3.88e-13 ***
## YearBuilt      5.483e-01  6.590e-02   8.321 6.20e-16 ***
## LotArea.1      1.589e+01  2.572e+00   6.177 1.22e-09 ***
## GarageSF       3.485e-02  6.808e-03   5.118 4.20e-07 ***
## Condition      6.753e+00  1.179e+00   5.729 1.62e-08 ***
## BasementSF     1.900e-02  4.009e-03   4.739 2.70e-06 ***
## Bedroom       -9.262e+00  2.074e+00  -4.467 9.54e-06 ***
## LotFrontage    1.153e-01  3.616e-02   3.189  0.00150 ** 
## PorchSF.1      1.625e+00  5.858e-01   2.774  0.00572 ** 
## TotalRooms     4.508e+00  1.541e+00   2.926  0.00357 ** 
## Fireplaces.1   7.913e+00  3.540e+00   2.235  0.02577 *  
## YearRemodel    1.345e-01  7.779e-02   1.729  0.08441 .  
## OpenPorchSF.1  9.537e-01  6.121e-01   1.558  0.11975    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.31 on 584 degrees of freedom
## Multiple R-squared:  0.873,  Adjusted R-squared:  0.8698 
## F-statistic: 267.7 on 15 and 584 DF,  p-value: < 2.2e-16
plot(ForwardMod2)

Clearly even with the transformations to the predictiors, the satisfication of the independence of errors conditions is not satisfied. So we are going to take the log of price and repeat the same steps.

MyAmesData4 = log(MyAmesData2[1])

#CorLinear = abs(cor(MyAmesData2$Price, MyAmesData2$LotFrontage)[1])
#CorLog = abs(cor(MyAmesData2$Price, MyAmesData2$LotFrontage.1)[1])

for (i in c(2:27)) {
  CorLinear = abs(cor(log(MyAmesData2$Price), MyAmesData2[i])[1])
  CorLog = abs(cor(log(MyAmesData2$Price), MyAmesData2[26+i])[1])
  if(CorLinear > CorLog) {
    MyAmesData4[i] = MyAmesData2[i]
  }
  else {
    MyAmesData4[i] = MyAmesData2[26+i]
  }
}

MyAmesData4$ScreenPorchYN = (MyAmesData2$ScreenPorchSF != 0) * 1
MyAmesData4$EnclosedPorchYN = (MyAmesData2$EnclosedPorchSF != 0) * 1
MyAmesData4$PorchYN = (MyAmesData2$PorchSF != 0) * 1

Backward:

Full3=lm(Price~., data=MyAmesData4)
MSE=(summary(Full3)$sigma)^2
BackMod3 = step(Full3,scale=MSE,trace=FALSE)

Forward:

none3=lm(Price~1, data=MyAmesData4)
ForwardMod3 = step(none3,scope=list(upper=Full3),scale=MSE,direction="forward",trace=FALSE)
#Forward selection

Stepwise Regression:

StepMod3 = step(none3,scope=list(upper=Full3),scale=MSE,trace=FALSE)
summary(BackMod3)
## 
## Call:
## lm(formula = Price ~ LotFrontage + LotArea.1 + Quality + Condition + 
##     YearBuilt + YearRemodel + BasementFinSF + BasementUnFinSF.1 + 
##     BasementSF + FirstSF.1 + SecondSF + GroundSF.1 + Bedroom + 
##     TotalRooms.1 + Fireplaces.1 + GarageSF + OpenPorchSF.1 + 
##     EnclosedPorchSF.1 + EnclosedPorchYN + PorchYN, data = MyAmesData4)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.54687 -0.05956  0.00055  0.06623  0.40955 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -8.134e+00  7.944e-01 -10.240  < 2e-16 ***
## LotFrontage        2.452e-04  1.716e-04   1.428 0.153702    
## LotArea.1          7.638e-02  1.253e-02   6.095 2.00e-09 ***
## Quality            8.973e-02  6.811e-03  13.175  < 2e-16 ***
## Condition          5.693e-02  5.588e-03  10.188  < 2e-16 ***
## YearBuilt          3.544e-03  3.264e-04  10.856  < 2e-16 ***
## YearRemodel        8.624e-04  3.677e-04   2.346 0.019327 *  
## BasementFinSF      9.409e-05  1.707e-05   5.512 5.35e-08 ***
## BasementUnFinSF.1  7.497e-03  4.219e-03   1.777 0.076073 .  
## BasementSF         7.636e-05  2.986e-05   2.557 0.010812 *  
## FirstSF.1          2.163e-01  6.080e-02   3.557 0.000405 ***
## SecondSF           1.391e-04  4.475e-05   3.108 0.001976 ** 
## GroundSF.1         1.393e-01  7.390e-02   1.884 0.060002 .  
## Bedroom           -2.013e-02  1.012e-02  -1.989 0.047160 *  
## TotalRooms.1       9.804e-02  5.425e-02   1.807 0.071282 .  
## Fireplaces.1       6.186e-02  1.705e-02   3.628 0.000311 ***
## GarageSF           1.290e-04  3.258e-05   3.960 8.42e-05 ***
## OpenPorchSF.1      5.618e-03  2.934e-03   1.915 0.056032 .  
## EnclosedPorchSF.1  6.875e-02  1.922e-02   3.578 0.000376 ***
## EnclosedPorchYN   -3.529e-01  9.676e-02  -3.647 0.000289 ***
## PorchYN            3.929e-02  1.941e-02   2.024 0.043378 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1289 on 579 degrees of freedom
## Multiple R-squared:  0.9029, Adjusted R-squared:  0.8995 
## F-statistic: 269.1 on 20 and 579 DF,  p-value: < 2.2e-16
summary(ForwardMod3)
## 
## Call:
## lm(formula = Price ~ Quality + GroundSF.1 + YearBuilt + LotArea.1 + 
##     Condition + BasementFinSF + BasementSF + GarageSF + Fireplaces.1 + 
##     YearRemodel + PorchSF.1 + PorchYN + OpenPorchSF.1, data = MyAmesData4)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.54412 -0.06434 -0.00266  0.07028  0.43175 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -8.229e+00  7.537e-01 -10.918  < 2e-16 ***
## Quality        9.222e-02  6.643e-03  13.882  < 2e-16 ***
## GroundSF.1     3.638e-01  2.396e-02  15.183  < 2e-16 ***
## YearBuilt      3.373e-03  3.138e-04  10.750  < 2e-16 ***
## LotArea.1      8.564e-02  1.191e-02   7.188 2.01e-12 ***
## Condition      5.421e-02  5.592e-03   9.696  < 2e-16 ***
## BasementFinSF  7.789e-05  1.429e-05   5.450 7.43e-08 ***
## BasementSF     1.172e-04  1.917e-05   6.112 1.79e-09 ***
## GarageSF       1.490e-04  3.230e-05   4.615 4.84e-06 ***
## Fireplaces.1   6.837e-02  1.686e-02   4.056 5.66e-05 ***
## YearRemodel    1.078e-03  3.631e-04   2.968  0.00312 ** 
## PorchSF.1      4.979e-02  1.784e-02   2.791  0.00542 ** 
## PorchYN       -2.194e-01  9.038e-02  -2.427  0.01553 *  
## OpenPorchSF.1  4.890e-03  2.943e-03   1.662  0.09712 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1309 on 586 degrees of freedom
## Multiple R-squared:  0.8986, Adjusted R-squared:  0.8964 
## F-statistic: 399.5 on 13 and 586 DF,  p-value: < 2.2e-16
summary(StepMod3)
## 
## Call:
## lm(formula = Price ~ Quality + GroundSF.1 + YearBuilt + LotArea.1 + 
##     Condition + BasementFinSF + BasementSF + GarageSF + Fireplaces.1 + 
##     YearRemodel + PorchSF.1 + PorchYN + OpenPorchSF.1, data = MyAmesData4)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.54412 -0.06434 -0.00266  0.07028  0.43175 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -8.229e+00  7.537e-01 -10.918  < 2e-16 ***
## Quality        9.222e-02  6.643e-03  13.882  < 2e-16 ***
## GroundSF.1     3.638e-01  2.396e-02  15.183  < 2e-16 ***
## YearBuilt      3.373e-03  3.138e-04  10.750  < 2e-16 ***
## LotArea.1      8.564e-02  1.191e-02   7.188 2.01e-12 ***
## Condition      5.421e-02  5.592e-03   9.696  < 2e-16 ***
## BasementFinSF  7.789e-05  1.429e-05   5.450 7.43e-08 ***
## BasementSF     1.172e-04  1.917e-05   6.112 1.79e-09 ***
## GarageSF       1.490e-04  3.230e-05   4.615 4.84e-06 ***
## Fireplaces.1   6.837e-02  1.686e-02   4.056 5.66e-05 ***
## YearRemodel    1.078e-03  3.631e-04   2.968  0.00312 ** 
## PorchSF.1      4.979e-02  1.784e-02   2.791  0.00542 ** 
## PorchYN       -2.194e-01  9.038e-02  -2.427  0.01553 *  
## OpenPorchSF.1  4.890e-03  2.943e-03   1.662  0.09712 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1309 on 586 degrees of freedom
## Multiple R-squared:  0.8986, Adjusted R-squared:  0.8964 
## F-statistic: 399.5 on 13 and 586 DF,  p-value: < 2.2e-16
plot(ForwardMod3)

R squared values are marginally better, the value before was .87 and now they are around .89

#Forward and Stepwise are the same model 
vif(BackMod3)
##       LotFrontage         LotArea.1           Quality         Condition 
##          1.121446          1.536948          3.316212          1.481949 
##         YearBuilt       YearRemodel     BasementFinSF BasementUnFinSF.1 
##          3.753977          2.150375          1.996687          2.021314 
##        BasementSF         FirstSF.1          SecondSF        GroundSF.1 
##          4.991997         12.615737         13.719947         21.702070 
##           Bedroom      TotalRooms.1      Fireplaces.1          GarageSF 
##          2.456902          4.473049          1.556870          1.787843 
##     OpenPorchSF.1 EnclosedPorchSF.1   EnclosedPorchYN           PorchYN 
##          1.458297         44.903726         47.324415          2.605627
vif(ForwardMod3)
##       Quality    GroundSF.1     YearBuilt     LotArea.1     Condition 
##      3.059337      2.212522      3.363803      1.347010      1.439036 
## BasementFinSF    BasementSF      GarageSF  Fireplaces.1   YearRemodel 
##      1.357394      1.995782      1.703536      1.475296      2.034132 
##     PorchSF.1       PorchYN OpenPorchSF.1 
##     54.125663     54.800962      1.422760

We have high vif values in both due to the inclusion of the indicator variables that are highly correlated with the original variables they were derived from. We’ll remove those and make the comparison again. We opt to remove the indicator variable rather than the original variable to remain faithful to the original data as much as possible.

ForwardMod3a = lm(formula = Price ~ Quality + GroundSF.1 + YearBuilt + LotArea.1 + 
    Condition + BasementFinSF + BasementSF + GarageSF + Fireplaces.1 + 
    YearRemodel + PorchSF.1+ OpenPorchSF.1, data = MyAmesData4)

BackwardsMod3a = lm(formula = Price ~ LotFrontage + LotArea.1 + Quality + Condition + 
    YearBuilt + YearRemodel + BasementFinSF + BasementUnFinSF.1 + 
    BasementSF + FirstSF.1 + SecondSF + GroundSF.1 + Bedroom + 
    TotalRooms.1 + Fireplaces.1 + GarageSF + OpenPorchSF.1 + 
    EnclosedPorchSF.1 + PorchYN, data = MyAmesData4)

summary(ForwardMod3a)
## 
## Call:
## lm(formula = Price ~ Quality + GroundSF.1 + YearBuilt + LotArea.1 + 
##     Condition + BasementFinSF + BasementSF + GarageSF + Fireplaces.1 + 
##     YearRemodel + PorchSF.1 + OpenPorchSF.1, data = MyAmesData4)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.54498 -0.06373 -0.00087  0.07082  0.46924 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -8.409e+00  7.532e-01 -11.164  < 2e-16 ***
## Quality        9.027e-02  6.621e-03  13.633  < 2e-16 ***
## GroundSF.1     3.672e-01  2.402e-02  15.289  < 2e-16 ***
## YearBuilt      3.475e-03  3.123e-04  11.126  < 2e-16 ***
## LotArea.1      8.634e-02  1.196e-02   7.219 1.63e-12 ***
## Condition      5.504e-02  5.604e-03   9.822  < 2e-16 ***
## BasementFinSF  7.948e-05  1.434e-05   5.544 4.47e-08 ***
## BasementSF     1.197e-04  1.923e-05   6.225 9.17e-10 ***
## GarageSF       1.518e-04  3.241e-05   4.683 3.51e-06 ***
## Fireplaces.1   6.975e-02  1.692e-02   4.123 4.28e-05 ***
## YearRemodel    1.052e-03  3.645e-04   2.886  0.00404 ** 
## PorchSF.1      7.031e-03  2.796e-03   2.514  0.01219 *  
## OpenPorchSF.1  4.963e-03  2.955e-03   1.680  0.09358 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1315 on 587 degrees of freedom
## Multiple R-squared:  0.8976, Adjusted R-squared:  0.8955 
## F-statistic: 428.8 on 12 and 587 DF,  p-value: < 2.2e-16
summary(BackwardsMod3a)
## 
## Call:
## lm(formula = Price ~ LotFrontage + LotArea.1 + Quality + Condition + 
##     YearBuilt + YearRemodel + BasementFinSF + BasementUnFinSF.1 + 
##     BasementSF + FirstSF.1 + SecondSF + GroundSF.1 + Bedroom + 
##     TotalRooms.1 + Fireplaces.1 + GarageSF + OpenPorchSF.1 + 
##     EnclosedPorchSF.1 + PorchYN, data = MyAmesData4)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.55036 -0.06030 -0.00128  0.06443  0.41341 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -8.260e+00  8.020e-01 -10.299  < 2e-16 ***
## LotFrontage        2.771e-04  1.732e-04   1.599 0.110267    
## LotArea.1          7.739e-02  1.266e-02   6.112 1.80e-09 ***
## Quality            8.747e-02  6.854e-03  12.762  < 2e-16 ***
## Condition          5.714e-02  5.646e-03  10.120  < 2e-16 ***
## YearBuilt          3.622e-03  3.292e-04  11.002  < 2e-16 ***
## YearRemodel        8.362e-04  3.715e-04   2.251 0.024761 *  
## BasementFinSF      9.630e-05  1.724e-05   5.586 3.58e-08 ***
## BasementUnFinSF.1  7.254e-03  4.263e-03   1.702 0.089328 .  
## BasementSF         8.197e-05  3.014e-05   2.720 0.006731 ** 
## FirstSF.1          2.039e-01  6.134e-02   3.325 0.000941 ***
## SecondSF           1.321e-04  4.518e-05   2.924 0.003593 ** 
## GroundSF.1         1.567e-01  7.452e-02   2.103 0.035908 *  
## Bedroom           -1.920e-02  1.022e-02  -1.878 0.060898 .  
## TotalRooms.1       8.573e-02  5.472e-02   1.567 0.117727    
## Fireplaces.1       6.660e-02  1.718e-02   3.876 0.000118 ***
## GarageSF           1.331e-04  3.290e-05   4.045 5.95e-05 ***
## OpenPorchSF.1      5.889e-03  2.964e-03   1.987 0.047412 *  
## EnclosedPorchSF.1  8.668e-04  4.836e-03   0.179 0.857833    
## PorchYN            2.515e-02  1.922e-02   1.309 0.191164    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1303 on 580 degrees of freedom
## Multiple R-squared:  0.9006, Adjusted R-squared:  0.8974 
## F-statistic: 276.7 on 19 and 580 DF,  p-value: < 2.2e-16
vif(ForwardMod3a)
##       Quality    GroundSF.1     YearBuilt     LotArea.1     Condition 
##      3.014506      2.204888      3.304207      1.346221      1.433655 
## BasementFinSF    BasementSF      GarageSF  Fireplaces.1   YearRemodel 
##      1.354543      1.990082      1.701441      1.473637      2.032403 
##     PorchSF.1 OpenPorchSF.1 
##      1.318753      1.422611
vif(BackwardsMod3a)
##       LotFrontage         LotArea.1           Quality         Condition 
##          1.118537          1.536198          3.288753          1.481787 
##         YearBuilt       YearRemodel     BasementFinSF BasementUnFinSF.1 
##          3.737942          2.149550          1.994185          2.020811 
##        BasementSF         FirstSF.1          SecondSF        GroundSF.1 
##          4.978771         12.576704         13.694797         21.611117 
##           Bedroom      TotalRooms.1      Fireplaces.1          GarageSF 
##          2.455339          4.455751          1.547848          1.785758 
##     OpenPorchSF.1 EnclosedPorchSF.1           PorchYN 
##          1.457359          2.785124          2.501627

Our adjusted R-squared values with these remain about the same, but the vif values are much more reasonable. In the new BackwardsMod3a, EnclosedPorchSF becomes very insignificant with a p-value of .86. High vif values in the backwards selction model led us to choose the new forward selection method model for this part. ForwardMod3a has a slightly lower R^2 value than BackwardsMod3a (0.8955 vs. 0.8974). There are fewer insignificant predictors in ForwardsMod3a than in BackwardsMod3a. The vif values for ForwardMod3a are much better than BackwardMod3a. We could make adjustments to BackwardsMod3a to resolve the high vifs and the insignificant p-values, but as we are content with the resulting ForwardMod3a, we will choose to adopt it rather than attempt to save a poor model.

Part 4

plot(ForwardMod3a)

max(abs(ForwardMod3a$residuals))
## [1] 1.544982
which.max(abs(ForwardMod3a$residuals))
## 585 
## 585
#max(abs(ForwardMod1$residuals))
rstandard(ForwardMod3a)[585]
##       585 
## -12.02693
rstudent(ForwardMod3a)[585]
##       585 
## -13.84264
plot(rstudent(ForwardMod3a)~ForwardMod3a$fitted.values, data = MyAmesData)
abline(0,0)

plot(rstandard(ForwardMod3a)~ForwardMod3a$fitted.values, data = MyAmesData)
abline(0,0)

#Showing Linearity in a Multiple Regression Model
plot(Price~ Quality + GroundSF.1 + YearBuilt + LotArea.1 + Condition + BasementFinSF + BasementSF + GarageSF + Fireplaces.1 + YearRemodel + PorchSF.1 + OpenPorchSF.1, MyAmesData4)

#Values for the 585 indexed point. Removing this point later
MyAmesData[585,]

After looking at the residuals vs leverage plot, one residual falls just outside of the 0.5 Cook’s Distance boundary. That point has a 1.8 difference between its standardized and studentized residuals, which are both over 10. Looking at the log(price) vs our predictors shows a fairly linear relationship across most predictors. In the cases where it isn’t linear, there are no better curves we could think of including. Linearity is satisfied.

The raw, standardized, and studentized plots all show that the data is has zero mean beacuse the residuals all appear to be centered around the reference line. The QQ plot appears to show a normal distribution since the bulk of the data falls along the qqline. The residuals vs fitted plot shows independence and constant variance because there is no easily visible pattern to the residuals for independence (the plot attempts to fit a curve but it remains close to 0 and would not be visible without the curve superimposed, and is certainly much better than the original model), and has no fan shape that would indicate non-constant variance.

Part 5

FinalAmesData = MyAmesData4[-585,]
FinalAmesData
FinalMod = lm(formula = Price ~ Quality + GroundSF.1 + YearBuilt + LotArea.1 + 
    Condition + BasementFinSF + BasementSF + GarageSF + Fireplaces.1 + 
    YearRemodel + PorchSF.1+ OpenPorchSF.1, data = FinalAmesData)

summary(ForwardMod3a)
## 
## Call:
## lm(formula = Price ~ Quality + GroundSF.1 + YearBuilt + LotArea.1 + 
##     Condition + BasementFinSF + BasementSF + GarageSF + Fireplaces.1 + 
##     YearRemodel + PorchSF.1 + OpenPorchSF.1, data = MyAmesData4)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.54498 -0.06373 -0.00087  0.07082  0.46924 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -8.409e+00  7.532e-01 -11.164  < 2e-16 ***
## Quality        9.027e-02  6.621e-03  13.633  < 2e-16 ***
## GroundSF.1     3.672e-01  2.402e-02  15.289  < 2e-16 ***
## YearBuilt      3.475e-03  3.123e-04  11.126  < 2e-16 ***
## LotArea.1      8.634e-02  1.196e-02   7.219 1.63e-12 ***
## Condition      5.504e-02  5.604e-03   9.822  < 2e-16 ***
## BasementFinSF  7.948e-05  1.434e-05   5.544 4.47e-08 ***
## BasementSF     1.197e-04  1.923e-05   6.225 9.17e-10 ***
## GarageSF       1.518e-04  3.241e-05   4.683 3.51e-06 ***
## Fireplaces.1   6.975e-02  1.692e-02   4.123 4.28e-05 ***
## YearRemodel    1.052e-03  3.645e-04   2.886  0.00404 ** 
## PorchSF.1      7.031e-03  2.796e-03   2.514  0.01219 *  
## OpenPorchSF.1  4.963e-03  2.955e-03   1.680  0.09358 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1315 on 587 degrees of freedom
## Multiple R-squared:  0.8976, Adjusted R-squared:  0.8955 
## F-statistic: 428.8 on 12 and 587 DF,  p-value: < 2.2e-16
summary(FinalMod)
## 
## Call:
## lm(formula = Price ~ Quality + GroundSF.1 + YearBuilt + LotArea.1 + 
##     Condition + BasementFinSF + BasementSF + GarageSF + Fireplaces.1 + 
##     YearRemodel + PorchSF.1 + OpenPorchSF.1, data = FinalAmesData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.68152 -0.06464 -0.00455  0.06490  0.43361 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -8.666e+00  6.547e-01 -13.237  < 2e-16 ***
## Quality        8.105e-02  5.791e-03  13.994  < 2e-16 ***
## GroundSF.1     3.591e-01  2.088e-02  17.204  < 2e-16 ***
## YearBuilt      3.645e-03  2.716e-04  13.422  < 2e-16 ***
## LotArea.1      9.447e-02  1.041e-02   9.076  < 2e-16 ***
## Condition      5.395e-02  4.870e-03  11.079  < 2e-16 ***
## BasementFinSF  7.860e-05  1.246e-05   6.310 5.50e-10 ***
## BasementSF     1.052e-04  1.674e-05   6.286 6.38e-10 ***
## GarageSF       1.864e-04  2.827e-05   6.593 9.65e-11 ***
## Fireplaces.1   7.226e-02  1.470e-02   4.916 1.15e-06 ***
## YearRemodel    1.036e-03  3.167e-04   3.273  0.00113 ** 
## PorchSF.1      6.518e-03  2.430e-03   2.682  0.00751 ** 
## OpenPorchSF.1  4.772e-03  2.567e-03   1.859  0.06359 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1142 on 586 degrees of freedom
## Multiple R-squared:  0.9175, Adjusted R-squared:  0.9158 
## F-statistic: 542.7 on 12 and 586 DF,  p-value: < 2.2e-16

By removing this point, we saw an increase in R^2 from 0.8955 to 0.9158. Also PorchSF.1 became more signifcant (** instead of *) in the final model. Overall, this is a good improvement and our model fits the data well.

newx = data.frame(Quality = 7, GroundSF.1 = log(2314+1), YearBuilt = 1995, LotArea.1 = log(11060+1), Condition = 5, BasementFinSF = 0, BasementSF = 1150, GarageSF = 502, Fireplaces.1 = log(1+1), YearRemodel = 2003, PorchSF.1 = log(274+1), OpenPorchSF.1 = log(274+1))

predict.lm(FinalMod, newx, interval = "prediction", level = 0.95)
##        fit      lwr      upr
## 1 5.510001 5.283118 5.736883
#Lower and Upper Bounds of a house with these characterisitcs
exp(5.283118)*1000
## [1] 196983.1
exp(5.736883)*1000
## [1] 310096.3

A house with these characteristics we predict to cost between 196983.1 and 310096.3 USD with 95% confidence.